# -*- coding: utf-8 -*-
import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from scrapy import Request


from fifacrawl.items import PlayerIdItem


class PlayerIdSpider(CrawlSpider):
  name = 'playerid'
  allowed_domains = ['fifa.com']
  start_urls = [
    'http://www.fifa.com/fifa-tournaments/archive/worldcup/index.html'
  ]

  rules = [
      Rule(LinkExtractor(allow=('/worldcup/archive/[a-z]*[0-9]{4,4}/index\.html', )), callback='parse_wc')
  ]

  def parse_wc(self, response):
    wcurl = response.url.replace('index.html','teams/index.html')
    return Request(wcurl, callback=self.parse_team)

  def parse_team(self, response):
    s = response.xpath('//a[@class="team"]/@href')
    competitionfolder = response.xpath('/html/head/meta[@name="competition-folder"]/@content').extract()[0].strip()
    competitiontimelineedition = response.xpath('/html/head/meta[@name="competition-timeline-edition"]/@content').extract()[0].strip()    
    for h in s:
      _u = h.extract()
      _u = _u.replace('index.html','_players/_players_list.html')
      _u = _u.replace(competitionfolder,'worldcup/archive/edition=%s/library' % competitiontimelineedition)
      yield Request('http://www.fifa.com'+_u, callback=self.parse_player)

  def parse_player(self, response):
    items = []
    s = response.xpath('//div[@class="p p-i-no"]/@data-player-id')
    for pn in s:
      pid = PlayerIdItem()
      pid['pid'] = int(pn.extract())
      items.append(pid)

    return items




